# Policy choice after Panel 1, for Study 2 in 
# Voters Use Campaign Finance Transparency and Compliance Information
# Abby K. Wood
# Political Behavior, 2021

# This file contains the code used to select the two issues -- immigration and
# sex ed -- used in Study 2

# set working directory
#setwd()


# install packages
#install.packages("foreign")
#install.packages("car")

library(foreign)
library(car)

load("issue_selection_replication.RData")
x <- subset(issues, complete ==1)
# delete two cases that were incomplete despite the above command
x <- subset(x, !(is.na(Abortion)) & !(is.na(x$SexEd)) & !(is.na(x$SupVax)))  

# need to add these folks back in and randomize them, but not helpful when plotting densities
x <- subset(x, x$PIDlean != "I don't know")

PIDnew <- ifelse(x$PIDdem == "Strong Democrat", 7, ifelse(
  x$PIDdem == "Not very strong Democrat", 6, ifelse(
    x$PIDlean == "Democratic Party", 5, ifelse(
      x$PIDlean == "Neither", 4, ifelse(
        x$PIDlean == "Republican Party", 3, ifelse(
          x$PIDrep == "Not very strong Republican", 2,
          ifelse(x$PIDrep == "Strong Republican", 1, NA)))))))

x$PIDnew <- PIDnew

quantile(x$Duration..in.seconds., probs = seq(0, .95, by = 0.05))
# drop ridiculously fast bottom 5%
x <- subset(x, Duration..in.seconds. > 141)

# Looking for an "important" and "less important" issue to choose. Here we ideally
# want inter-partisan agreement on how they rank these issues.

# first, visual inspection
# Importance of Abortion
plot(density(x$Abortion[x$PIDnew < 4]), xlim = c(-3, 15), ylim = c(0, 1), col = "red")
par(new = T)
plot(density(x$Abortion[x$PIDnew > 4]), col = "blue", xlim = c(-3, 15), ylim = c(0, 1))
ks.test(x$Abortion[x$PIDnew < 4], x$Abortion[x$PIDnew > 4])

#Transportation
plot(density(x$Transportation[x$PIDnew < 4]), xlim = c(-3, 15), ylim = c(0, 1), col = "red")
par(new = T)
plot(density(x$Transportation[x$PIDnew > 4]), col = "blue", xlim = c(-3, 15), ylim = c(0, 1))
ks.test(x$Transportation[x$PIDnew < 4], x$Transportation[x$PIDnew > 4])

#Immunization  <-  a possibility - no real diff bw r and d, mean = 6.85
plot(density(x$Immunization[x$PIDnew < 4]), xlim = c(-3, 15), ylim = c(0, 1), col = "red")
par(new = T)
plot(density(x$Immunization[x$PIDnew > 4]), col = "blue", xlim = c(-3, 15), ylim = c(0, 1))
ks.test(x$Immunization[x$PIDnew < 4], x$Immunization[x$PIDnew > 4])

# SexEd  <- another possibility - no real diff bw r and d, mean 7.75, 
# distribution slightly more uniform than immunization
plot(density(x$SexEd[x$PIDnew < 4]), xlim = c(-3, 15), ylim = c(0, 1), col = "red")
par(new = T)
plot(density(x$SexEd[x$PIDnew > 4]), col = "blue", xlim = c(-3, 15), ylim = c(0, 1))
ks.test(x$SexEd[x$PIDnew < 4], x$SexEd[x$PIDnew > 4])

#Immigration
plot(density(x$Immigration[x$PIDnew < 4]), xlim = c(-3, 15), ylim = c(0, 0.4), col = "red", 
     main = "")
par(new = T)
plot(density(x$Immigration[x$PIDnew > 4]), col = "blue", xlim = c(-3, 15), ylim = c(0, 0.4), 
     main = "")
par(new = T)
plot(density(x$Immigration[x$PIDnew == 4]), col = "green", xlim = c(-3, 15), ylim = c(0, 0.4), 
     main = "")
ks.test(x$Immigration[x$PIDnew < 4], x$Immigration[x$PIDnew > 4])

# healthcare
plot(density(x$Healthcare[x$PIDnew < 4]), xlim = c(-3, 15), ylim = c(0, 0.4), col = "red")
par(new = T)
plot(density(x$Healthcare[x$PIDnew > 4]), col = "blue", xlim = c(-3, 15), ylim = c(0, 0.4))
ks.test(x$Healthcare[x$PIDnew < 4], x$Healthcare[x$PIDnew > 4])


mytable <- matrix(ncol = 7, nrow = 6)
rownames(mytable) <- c('Abortion', 'Immigration', 'Immunization', 
                      'Transportation', 'SexEd', 'Healthcare')
colnames(mytable) <- c('Overall Mean', 'Republican Mean', 'Dem Mean', 'KS test',  'ks p val',
                       't.test R D mean', 't.test pval')
mytable[1,] <- c(mean(x$Abortion), 
                 mean(x$Abortion[x$PIDnew < 4]), mean(x$Abortion[x$PIDnew > 4]), 
                 ks.test(x$Abortion[x$PIDnew < 4], x$Abortion[x$PIDnew > 4])$statistic, 
                 ks.test(x$Abortion[x$PIDnew < 4], x$Abortion[x$PIDnew > 4])$p.value,
                 t.test(x$Abortion[x$PIDnew < 4], x$Abortion[x$PIDnew > 4])$statistic, 
                 t.test(x$Abortion[x$PIDnew < 4], x$Abortion[x$PIDnew > 4])$p.value)

mytable[2,] <- c(mean(x$Immigration), 
                 mean(x$Immigration[x$PIDnew < 4]), mean(x$Immigration[x$PIDnew > 4]), 
                 ks.test(x$Immigration[x$PIDnew < 4], x$Immigration[x$PIDnew > 4])$statistic, 
                 ks.test(x$Immigration[x$PIDnew < 4], x$Immigration[x$PIDnew > 4])$p.value,
                 t.test(x$Immigration[x$PIDnew < 4], x$Immigration[x$PIDnew > 4])$statistic, 
                 t.test(x$Immigration[x$PIDnew < 4], x$Immigration[x$PIDnew > 4])$p.value)


mytable[3,] <- c(mean(x$Immunization), 
                 mean(x$Immunization[x$PIDnew < 4]), mean(x$Immunization[x$PIDnew > 4]), 
                 ks.test(x$Immunization[x$PIDnew < 4], x$Immunization[x$PIDnew > 4])$statistic, 
                 ks.test(x$Immunization[x$PIDnew < 4], x$Immunization[x$PIDnew > 4])$p.value,
                 t.test(x$Immunization[x$PIDnew < 4], x$Immunization[x$PIDnew > 4])$statistic, 
                 t.test(x$Immunization[x$PIDnew < 4], x$Immunization[x$PIDnew > 4])$p.value)

mytable[4,] <- c(mean(x$Transportation), 
                  mean(x$Transportation[x$PIDnew < 4]), mean(x$Transportation[x$PIDnew > 4]), 
                  ks.test(x$Transportation[x$PIDnew < 4], x$Transportation[x$PIDnew > 4])$statistic, 
                  ks.test(x$Transportation[x$PIDnew < 4], x$Transportation[x$PIDnew > 4])$p.value,
                  t.test(x$Transportation[x$PIDnew < 4], x$Transportation[x$PIDnew > 4])$statistic, 
                  t.test(x$Transportation[x$PIDnew < 4], x$Transportation[x$PIDnew > 4])$p.value)

mytable[5,] <- c(mean(x$SexEd), 
                  mean(x$SexEd[x$PIDnew < 4]), mean(x$SexEd[x$PIDnew > 4]), 
                  ks.test(x$SexEd[x$PIDnew < 4], x$SexEd[x$PIDnew > 4])$statistic, 
                  ks.test(x$SexEd[x$PIDnew < 4], x$SexEd[x$PIDnew > 4])$p.value,
                  t.test(x$SexEd[x$PIDnew < 4], x$SexEd[x$PIDnew > 4])$statistic, 
                  t.test(x$SexEd[x$PIDnew < 4], x$SexEd[x$PIDnew > 4])$p.value)

mytable[6,] <- c(mean(x$Healthcare), 
                 mean(x$Healthcare[x$PIDnew < 4]), mean(x$Healthcare[x$PIDnew > 4]), 
                 ks.test(x$Healthcare[x$PIDnew < 4], x$Healthcare[x$PIDnew > 4])$statistic, 
                 ks.test(x$Healthcare[x$PIDnew < 4], x$Healthcare[x$PIDnew > 4])$p.value,
                 t.test(x$Healthcare[x$PIDnew < 4], x$Healthcare[x$PIDnew > 4])$statistic, 
                 t.test(x$Healthcare[x$PIDnew < 4], x$Healthcare[x$PIDnew > 4])$p.value)


# sexed is clearly not that important to people, so that's a good "low importance"
# issue.  For high importance issue, healthcare and immigration are the two 
# leading contenders, but we need to see whether there's separation between R and D
# on those two issues, which is where the next set of analyses comes from.
mytable



# Now checking policy support separation 

# Immigration policy
# note big separation between R and D, and the climb with 
# Ds from very low density at 1 to much higher density at 3 and 4

x$SupImmigPolicy <-recode(x$SupImmigPolicy, 
                          "'Strongly Oppose' = '1'; 'Oppose' = '2'; 
                         'Support' = '3'; 'Strongly Support' = '4'")

x$SupImmigPolicynum <- as.numeric(x$SupImmigPolicy)

table(x$SupImmigPolicy, x$PIDnew)
x$SupImmigPolicynum <- as.numeric(x$SupImmigPolicy)
plot(density(x$SupImmigPolicynum[x$PIDnew < 4]), col = "red", 
     ylim = c(0, 0.8), xlim = c(0, 5))
par(new = T)
plot(density(x$SupImmigPolicynum[x$PIDnew > 4]), col = "blue", 
     ylim = c(0, 0.8), xlim = c(0, 5))


# note big separation in Sex Ed between R and D
x$SupSexEd <-recode(x$SupSexEd, 
                    "'Strongly Oppose' = '1'; 'Oppose' = '2'; 
                          'Support' = '3'; 'Strongly Support' = '4'")

x$SupSexEdnum <- as.numeric(x$SupSexEd)

table(x$SupSexEd, x$PIDnew)

plot(density(x$SupSexEdnum[x$PIDnew < 4]), col = "red", ylim = c(0, 1.3), 
     xlim = c(0, 5), main = "")
par(new = T)
plot(density(x$SupSexEdnum[x$PIDnew > 4]), col = "blue", ylim = c(0, 1.3), 
     xlim = c(0, 5), main = "")


# Healthcare -- way less separation, so can't choose healthcare
x$SupHealthcarenum <-recode(x$SupHealthcare, 
                          "'Strongly Oppose' = '1'; 'Oppose' = '2'; 
                         'Support' = '3'; 'Strongly Support' = '4'")

x$SupHealthcarenum <- as.numeric(x$SupHealthcarenum)

table(x$SupHealthcare, x$PIDnew)

plot(density(x$SupHealthcarenum[x$PIDnew < 4]), col = "red", 
     ylim = c(0, 2), xlim = c(0, 5))
par(new = T)
plot(density(x$SupHealthcarenum[x$PIDnew > 4]), col = "blue", 
     ylim = c(0, 2), xlim = c(0, 5))

# (I include the rest of the support plots for completeness only)
# Abortion
x$SupAbortion20 <-recode(x$SupAbortion20, 
                         "'Strongly Oppose' = '1'; 'Oppose' = '2'; 
                           'Support' = '3'; 'Strongly Support' = '4'")

x$SupAbortion20num <- as.numeric(x$SupAbortion20)
table(x$SupAbortion20, x$PIDnew)

plot(density(x$SupAbortion20num[x$PIDnew < 4]), col = "red", ylim = c(0, 1.3))
par(new = T)
plot(density(x$SupAbortion20num[x$PIDnew > 4]), col = "blue", ylim = c(0, 1.3))


# transportation
x$SupTransp <-recode(x$SupTransp, 
                    "'Strongly Oppose' = '1'; 'Oppose' = '2'; 
                    'Support' = '3'; 'Strongly Support' = '4'")

x$SupTranspnum <- as.numeric(x$SupTransp)

table(x$SupTransp, x$PIDnew)

plot(density(x$SupTranspnum[x$PIDnew < 4]), col = "red", ylim = c(0, 1.3), 
     xlim = c(0, 5), main = "")
par(new = T)
plot(density(x$SupTranspnum[x$PIDnew > 4]), col = "blue", ylim = c(0, 1.3), 
     xlim = c(0, 5), main = "")

# immunization
x$SupVax <-recode(x$SupVax, 
                     "'Strongly Oppose' = '1'; 'Oppose' = '2'; 
                     'Support' = '3'; 'Strongly Support' = '4'")

x$SupVaxnum <- as.numeric(x$SupVax)
x <- subset(x, !(is.na(SupVaxnum)))


table(x$SupVax, x$PIDnew)

plot(density(x$SupVaxnum[x$PIDnew < 4]), col = "red", xlim = c(0, 5), 
     ylim = c(0, 1.5), main = "")
par(new = T)
plot(density(x$SupVaxnum[x$PIDnew > 4]), col = "blue",  xlim = c(0, 5), 
     ylim = c(0, 1.5), main = "")

# Issues chosen were same as analysis in top half of code - immigration and sex ed

# The next step, for anyone hoping to replicate the conjoint, too, was to 
# assign in the conjoint whether a respondent observed the Democratic candidates, 
# who were more  supportive of the immigration and sex ed policies 
# (taking the values "Strongly supports", "supports", or "opposes" on the policies)
# or whether they observed the Republican candidates ("supports", "opposes", 
# "Strongly opposes").  I did that by assigning everyone who was a Strong Dem, 
# Not very strong Dem, Dem Party member, self-described Liberal ("extremely", 
# "slightly", or "liberal") to see the Democratic primary.  If people described 
# themselves as Republicans (as obove) or conservatives (as above), they were
# assigned to see candidates in a republican primary.  People who were true 
# independents ("Independent" for party ID and "moderate" for ideology) were 
# randomized into a primary.  

# This is really concrete and practical, but I just created a "wave 2 assignment" 
# vector and saved it as a .txt file with respondent ID tags sent by the survey 
# vendor, and sent it back to them to run survey 2 on the same respondents.

# I'm leaving the code below, but it's truly just for anyone trying to learn 
# how to run a panel; it doesn't go toward anything in the paper.

# divvying up folks into their wave 2 assignment  
xbig <- issues

xbig$wave2assignment <- NA
xbig <- subset(xbig, complete == 1)
xbig <- subset(xbig, !(is.na(xbig$Abortion)) & !(is.na(xbig$SexEd)) & !(is.na(xbig$SupVax)))  # gets rid of two that made it though incomplete


PIDnew <- ifelse(xbig$PIDdem == "Strong Democrat", 7, ifelse(
  xbig$PIDdem == "Not very strong Democrat", 6, ifelse(
    xbig$PIDlean == "Democratic Party", 5, ifelse(
      xbig$PIDlean == "Neither", 4, ifelse(
        xbig$PIDlean == "Republican Party", 3, ifelse(
          xbig$PIDrep == "Not very strong Republican", 2,
          ifelse(xbig$PIDrep == "Strong Republican", 1, NA)))))))

xbig$PIDnew <- PIDnew

xbig$wave2assignment[xbig$PIDnew < 4] <- "R"
xbig$wave2assignment[xbig$PIDnew > 4] <- "D"

xbig$wave2assignment[(xbig$PIDnew == 4 | is.na(xbig$PIDnew)) & (xbig$ideo7 == "Liberal" | 
                                          xbig$ideo7 == "Slightly Liberal" | 
                                         xbig$ideo7 == "Extremely Liberal") | 
                       xbig$partyreg == "Democratic Party"] <- "D"

xbig$wave2assignment[(xbig$PIDnew == 4 | is.na(xbig$PIDnew)) & (xbig$ideo7 == "Conservative" | 
                                           xbig$ideo7 == "Slightly Conservative" | 
                                           xbig$ideo7 == "Extremely Conservative") |
                       xbig$partyreg == "Republican Party"] <- "R"


xbig$wave2assignment[(xbig$PIDnew == 4 | is.na(xbig$PIDnew)) & 
                       (xbig$ideo7 == "Moderate; Middle of the Road" | 
                          xbig$ideo7 == "Haven't thought much about this")] <- "randomize"

rand <- which(xbig$wave2assignment == "randomize")

set.seed(40)
sampR <- sample(rand, size = length(rand)/2, replace = F)

xbig$wave2assignment[sampR] <- "R"
xbig$wave2assignment[xbig$wave2assignment == "randomize"] <- "D"

forBovitz <- subset(xbig, select = c(RESPONDENT_ID, wave2assignment))

# then write.table(), in which I saved as a .txt file with sep = "\t" and 
# sent it back to the vendor
